library(TDA)
library(tdaTS) #our package
library(tibble)
library(plotly) #3D plotting
library(fields) #image.plot functionWe propose to compare the persistence diagram or the barcodes of data generated with the same topology.
We need to find a distance between barcodes close to zero even when the barcodes look different (after some kind a rescaling…)
myplots <- function(data, complex = "alpha")
{
if(complex == "alpha")
{
DiagAlphaCmplx <- alphaComplexDiag(X = data,
library = c("GUDHI", "Dionysus"),
location = TRUE)
}
if(complex == "rips")
{
DiagAlphaCmplx <- ripsDiag(X = data,
maxdimension = 1,
maxscale = 1,
dist = "euclidean",
library = c("GUDHI", "Dionysus"),
location = TRUE)
}
plot(DiagAlphaCmplx$diagram, barcode = TRUE)
plot(data, col = 1,xaxt="n", yaxt="n",xlab="", ylab="", asp = 1)
one <- which(DiagAlphaCmplx[["diagram"]][, 1] == 1)
for (i in seq(along = one))
{
for (j in seq_len(dim(DiagAlphaCmplx[["cycleLocation"]][[one[i]]])[1]))
{lines(DiagAlphaCmplx[["cycleLocation"]][[one[i]]][j, , ] + rnorm(n = 4,sd = 0.0),cex = 1, col = i + 1)}
}
return(DiagAlphaCmplx$diagram)
}pointSquareHole <- function(n, Hole_Relative_length = 0)
{
len <- sqrt(Hole_Relative_length)
a <- (1-Hole_Relative_length)/2
b <- (1+Hole_Relative_length)/2
mat <- matrix(NA,nrow = 0, ncol = 2)
while(nrow(mat) < n)
{
u <- runif(2)
if(!(u[1] > a & u[1] < b & u[2] > a & u[2] < b)){mat <- rbind(mat, u)}
}
res <- mat %>%
as_tibble(.name_repair = "minimal") %>%
setNames(c("x","y"))
return(res)
}data <- pointSquareHole(5000, 0.075)
pl <- myplots(data)data <- pointSquareHole(5000, 0.15)
pl <- myplots(data)data <- pointSquareHole(5000, 0.3)
pl <- myplots(data)CONCLUSION 1: We need a distance between barcodes, which doesn’t take into account the death time of the hole (the most persistent elements).
data <- pointSquareHole(50, 0.3)
pl <- myplots(data)data <- pointSquareHole(500, 0.3)
pl <- myplots(data)data <- pointSquareHole(5000, 0.3)
pl <- myplots(data)We need a distance between barcodes, which doesn’t detect differences bertween the 3 barcodes.
CONCLUSION 2: less point = less information. The distance has to take the number of points into account
pointSquareHole(50, 0.3) contains a hole but we barely
can see it.
data <- pointSquareHole(100, 0)
pl <- myplots(data)data <- pointSquareHole(300, 0)
pl <- myplots(data)data <- pointSquareHole(1000, 0)
pl <- myplots(data)CONCLUSION 3: we see a kind of limit distribution for H1 elements…
pointCircleGap <- function(n, gap)
{
gap <- 2 * pi * gap
t <- runif(n, min = 0, max = 2 * pi - gap)
x <- cos(t)
y <- sin(t)
res <- matrix(c(x,y), nrow = n, byrow = FALSE) %>%
as_tibble(.name_repair = "minimal") %>%
setNames(c("x","y"))
return(res)
}data <- pointCircleGap(100, 0)
pl <- myplots(data)data <- pointCircleGap(100, 0.05)
pl <- myplots(data)data <- pointCircleGap(100, 0.1)
pl <- myplots(data)data <- pointCircleGap(100, 0.3)
pl <- myplots(data)The same with additional Gaussian noise.
data<- pointCircleGap(100, 0)
data$x <- data$x + rnorm(n= 100, mean = 0, sd = 0.1)
data$y <- data$y + rnorm(n = 100, mean = 0, sd = 0.1)
pl <- myplots(data)data <- pointCircleGap(100, 0.1)
data$x <- data$x + rnorm(n= 100, mean = 0, sd = 0.1)
data$y <- data$y + rnorm(n = 100, mean = 0, sd = 0.1)
pl <- myplots(data)data <- pointCircleGap(100, 0.2)
data$x <- data$x + rnorm(n= 100, mean = 0, sd = 0.1)
data$y <- data$y + rnorm(n = 100, mean = 0, sd = 0.1)
pl <- myplots(data)data <- pointCircleGap(100, 0.3)
data$x <- data$x + rnorm(n= 100, mean = 0, sd = 0.1)
data$y <- data$y + rnorm(n = 100, mean = 0, sd = 0.1)
pl <- myplots(data)If no gap, the H1 main element appears at the same time a the last fusion in H0 elements.
The bigger the gap, the bigger the delay in H1.
CONCLUSION 4: we need a distance that is not sensitive to the gap length between fusion of all H0 and appearence of H1. It has to be only sensitive to the presence/absence of a gap.
data <- pointSquareHole(100,0)
pl <- myplots(data, complex = "alpha")pl <- myplots(data, complex = "rips")Gaussian versus Uniform noise
data<- pointSquareHole(500, 0)
pl <- myplots(data, complex = "alpha")data<- data.frame(matrix(0, nrow = 500, ncol = 2))
colnames(data) <- c("x", "y")
data$x <- data$x + rnorm(n= 500, mean = 0, sd = 10)
data$y <- data$y + rnorm(n = 500, mean = 0, sd = 10)
pl <- myplots(data, complex = "alpha")Alpha diagrams (Rips doesn’t work…)
data<- pointSquareHole(10000, 0)
diag1 <- myplots(data)data<- data.frame(matrix(0, nrow = 10000, ncol = 2))
colnames(data) <- c("x", "y")
data$x <- data$x + rnorm(n= 10000, mean = 0, sd = 10)
data$y <- data$y + rnorm(n = 10000, mean = 0, sd = 10)
diag2 <- myplots(data)sel <- diag1[,1] == 1
res1 <- diag1[sel,3]/diag1[sel,2]
sel <- diag2[,1] == 1
res2 <- diag2[sel,3]/diag2[sel,2]Gumbel distribution?
hist(log(log(res1)) - mean(log(log(res1))), breaks = 200)hist(log(log(res2))- mean(log(log(res1))), breaks = 200)Max values
max(res1)## [1] 9.439811
max(res2)## [1] 7.666121
The Square with a hole?
data <- pointSquareHole(10000, 0.3)
diag3 <- myplots(data)sel <- diag3[,1] == 1
res3 <- diag3[sel,3]/diag3[sel,2]
hist(log(log(res3)) - mean(log(log(res3))), breaks = 200)Max value
max(res3)## [1] 584.8804
The circle closed + small noise
data <- pointCircleGap(10000, 0)
data$x <- data$x + rnorm(n= 10000, mean = 0, sd = 0.01)
data$y <- data$y + rnorm(n = 10000, mean = 0, sd = 0.01)
diag3 <- myplots(data)sel <- diag3[,1] == 1
res3 <- diag3[sel,3]/diag3[sel,2]
hist(log(log(res3)) - mean(log(log(res3))), breaks = 200)Max value
max(res3)## [1] 56973.07
The circle not closed + small noise
data <- pointCircleGap(10000, 0.03)
data$x <- data$x + rnorm(n= 10000, mean = 0, sd = 0.01)
data$y <- data$y + rnorm(n = 10000, mean = 0, sd = 0.01)
diag3 <- myplots(data)sel <- diag3[,1] == 1
res3 <- diag3[sel,3]/diag3[sel,2]
hist(log(log(res3)) - mean(log(log(res3))), breaks = 200)Max value
max(res3)## [1] 154.1639
We need a distance between barcodes, which doesn’t take into account the death time of the hole (the most persistent elements)
The distance should be focused on what’s happening just after H0 fusions: presence or absence of a gap between main fusions and appearence og H1 elements.
less point = less information. The distance has to take the quantiy of information into account